
/********************************************************************************
Discrimination in Multi-Phase Systems: Evidence from Child Protection

Last Modified on: 2/21/2024

Description: This file will use NCANDS data to create contour plots in figure A2 and Figure 4

Note that we have removed the file directory names from this program for 
confidentiality reasons.
********************************************************************************/



********************************************************************************
* Directories
global rawpath “raw_directory” // raw data directory, edit before running
global main “main_directory” // project directory, edit before running

global intpath "${main}/data/int"
global cleanpath "${main}/data/clean"
global outpath "${main}/output"

cap mkdir "${main}/data"

cap mkdir "${intpath}"
cap mkdir "${cleanpath}"
cap mkdir "${outpath}"

* Program Switches
local import = 0 // 1 if importing data
local clean = 0 // 1 to clean
local makelongstate = 1 // 1 to produce the long state-year dataset

********************************************************************************


**************************
**  Load in Data
**************************

if `import' == 1 {

**************************
** Load in Data
**************************

use "${rawpath}/2008_Data/Data/Stata Files/CF2008v6.dta", clear

append using "${rawpath}/2009_Data/Data/Stata/CF2009v7.dta"

append using "${rawpath}/2010_Data/Data/Stata Files/CF2010v6.dta"

append using "${rawpath}/2011_Data/Data/Stata Files/CF2011v6.dta"

append using "${rawpath}/2012_Data/Data/Stata Files/CF2012v6.dta"

tostring fcdchdt, replace // fcdchdt was blank up to here

append using "${rawpath}/2013_Data/Data/Stata/CF2013v6.dta"

append using "${rawpath}/2014_Data/Data/Stata/CF2014v5.dta"

append using "${rawpath}/2015_Data/Data/Stata Files/CF2015v5.dta"

append using "${rawpath}/2016_Data/Data/Stata Files/CF2016v4.dta"

append using "${rawpath}/2017_Data/220 Data/Stata Files/CF2017v4.dta"

append using "${rawpath}/2018_Data/233 Data/Stata Files/CF2018v4.dta"

append using "${rawpath}/2019_Data/237 Data/Stata Files/CF2019v5.dta"

append using "${rawpath}/2020_Data/Data/Stata Files/CF2020v3.dta"

append using "${rawpath}/2021_Data/Data/Stata Files/CF2021v2.dta"


* Dates
foreach var in rptdt rpdispdt { // harmonize the pre-2002 dates with the rest of the data
gen `var'_temp = date(`var', "YMD")
drop `var'
rename `var'_temp `var'
format `var' %td
}

* Align dates: reports happen Jan 2008-Dec 2020, with followup data through 2021
gen rptyr = year(rptdt)
tab rptyr
drop if rptyr<2008 | rptyr > 2021


* Tagging Suspect Child ID's

replace afcarsid = "BLANK" if afcarsid == "" | afcarsid == " " 
egen new_chid = group(state chid afcarsid), autotype
rename chid old_chid
rename new_chid chid

// Report-Age Discrepancies

gen age = chage

replace age = 0 if age == 77 // unborn
replace age = . if age > 30

bys state chid (rptdt): gen rpt_yr_diff = round((rptdt - rptdt[_n-1])/365,1)
bys state chid (rptdt): gen age_yr_diff = age - age[_n-1]
gen rpt_age_diff = abs(rpt_yr_diff - age_yr_diff)

bys state chid: egen flag = max(rpt_age_diff)
gen flag_new_chid = flag >= 2 & flag !=.
drop flag *_diff


drop if staterr == "XX" | staterr == "PR"
compress
save "${intpath}/NCANDS_imported.dta", replace

}

**************************
**/ Clean Data
**************************

if `clean' == 1 {

use "${intpath}/NCANDS_imported.dta", clear

rename staterr state
rename rpdispdt rptdispdt
rename cethn chethn

**************************
** Keep Vars 
**************************

keep rptyr state chid chage chsex chrac* chethn *dt rptdisp fostercr chmal* mal* rptvictim rptfips flag_* 

**************************
** Clean Vars
**************************

drop if flag_new_chid == 1
label variable chid "Combo Child ID"

duplicates drop

* Make sure all unknowns are coded as 9
foreach var of varlist chrac* chethn {
	replace `var' = 9 if `var' == .
}

* Demographics
gen wh_only  = chracwh == 1 & chethn !=1 & chracai != 1 & chracbl != 1 & chracnh !=1 & chracud != 1 // White only
gen black = chracbl == 1

* Sex
gen female = chsex == 2
replace female = . if chsex !=1 & chsex !=2

* Outcome
gen substantiated_rpt = inlist(rptdisp, 1,2,3)
replace substantiated_rpt = . if rptdisp == 99

gen substantiated = inlist(mal1lev, 1, 2, 3) | inlist(mal2lev, 1, 2, 3) | inlist(mal3lev, 1, 2, 3) | inlist(mal4lev, 1, 2, 3)

gen victim = rptvictim == 1

sort state chid rptdt

bys state chid (rptdt): gen case_num = _n // want to fix the order of the cases in the event that there are same day cases

bys state chid (case_num): gen victim_wi_6m = rptdt[_n+1] - rptdt < 182 & rptdt[_n+1]-rptdt != . & victim[_n+1] == 1 

foreach j of numlist 2(1)6 {
bys state chid (case_num): gen inv_wi_`j'm = rptdt[_n+1] - rptdt[_n] < `j'*30.2 & rptdt[_n+1] - rptdt[_n] != . 
bys state chid (case_num): gen substantiated_wi_`j'm = rptdt[_n+1] - rptdt[_n] < `j'*30.2 & rptdt[_n+1] - rptdt[_n] != . & substantiated[_n+1] == 1 
bys state chid (case_num): gen placed_wi_`j'm = rptdt[_n+1] - rptdt[_n] < `j'*30.2 & rptdt[_n+1] - rptdt[_n] != . & fostercr[_n+1] == 1
}

foreach j of numlist 2(1)6 {
replace inv_wi_`j'm = . if fostercr==1
replace substantiated_wi_`j'm = . if fostercr==1
replace placed_wi_`j'm = . if fostercr==1
}

* Placed in Care
gen fc = fostercr == 1

**************************
** Focal Cases
**************************

gen focal = .
gen resolved = .
gen days_from_prior_focal = .

order chid rptdt days_from_prior_focal focal resolved

quietly sum rptdt
local totalobs = r(N)

quietly sum resolved
local resolvedobs = r(N)

bys state chid (case_num): replace focal = 1 if _n == 1 // first focal

while `resolvedobs' < `totalobs' {
local progress = `resolvedobs'/`totalobs' * 100
di "Progress: `progress'%"

bys state chid: egen last_focal_t = max(rptdt) if focal == 1 // find most recent focal date
bys state chid: egen last_focal = max(last_focal_t) // this is just to have the above for every row per child

replace days_from_prior_focal = rptdt-last_focal

replace resolved = 1 if days_from_prior_focal<=365 // reports less than a year from most recent focal are "resolved"
bys state chid resolved (case_num): replace focal = 1 if _n == 1 & resolved == . // the first observation that is still not resolved is >365 days after the most recent focal observation

drop last_focal*

quietly sum resolved
local resolvedobs = r(N)
}

drop days_from_prior_focal resolved

replace focal = 0 if focal == .

**************************
** Sample Restrictions 
**************************

* drop if report happened after Dec. 31, 2019; they were only included so we could see subsequent maltreatment
drop if rptdt > date("20191231", "YMD")

** Get Ns Before Drops
count // number of cases
count if focal == 1 // number of focal cases
egen tag = tag(chid)
count if tag // number of kids
drop tag

drop if maldeath == 1

** Drop 17+ because we can't see subsequent cases past this
keep if chage < 17 

** Drop Sexual Abuse Cases from Focal
gen sexualabuse = inlist(chmal1, 4) | inlist(chmal2, 4) | inlist(chmal3, 4) | inlist(chmal4, 4)
replace focal = . if focal == 1 & sexualabuse == 1 // just N/A-ing the focal cases that are sexual abuse but not dropping

sort chid case_num
order chid case_num rptdt

save "${cleanpath}/NCANDS_clean.dta", replace

}

**************************
** Produce State Dataset
**************************

if `makelongstate' == 1 {

// State Crosswalk

use "${cleanpath}/NCANDS_clean.dta", clear

keep if focal == 1
		
keep if wh_only == 1 | black == 1
assert wh_only != black

bys state rptyr: egen tester = min(chracwh) // to figure out if there's any race data at all
drop if tester >=3 | tester == . // all unknown or all missing
drop tester

bys state rptyr: egen tester = max(fc) // to figure out if there's any foster care data at all
drop if tester == 0 // all unknown or all missing
drop tester

levelsof state, local(states) clean
local states USA USA_balanced `states'
di "`states'"

foreach state of local states {
	di "`state'"
}

tempfile states_with_data
save `states_with_data'

keep state rptyr
rename rptyr year
duplicates drop

bys state: gen count = _N
gen us_balanced = count == 12
drop count 
rename year rptyr

merge 1:m state rptyr using `states_with_data', nogen
tempfile states_with_data
save `states_with_data'

// Gather Statistics

foreach state of local states { 
use `states_with_data', clear

if "`state'" != "USA" & "`state'" != "USA_balanced" { // if we want national average then we don't do anything
keep if state == "`state'"
}

if "`state'" == "USA_balanced" { // balanced set of states
keep if us_balanced == 1
}

local realrow = 1

matrix realmatrix = J(1,187,.)

		preserve
		
		di "`state'" 
		
		tempfile basefile // already has the year and state restriction, and kept only focals
		save `basefile'
		
		sum black
		local bshare = r(mean)
		
		sum fc
		local D = r(mean)
		local N = r(N)
		
		sum fc if black == 0
		local D_w = r(mean)
		local N_w = r(N)		
		
		sum fc if black == 1
		local D_b = r(mean)
		local N_b = r(N)		

		matrix realmatrix[`realrow',62]=`D'
		matrix realmatrix[`realrow',80] = sqrt((`D'*(1-`D'))/`N')
		matrix realmatrix[`realrow',63]=`D_w'
		matrix realmatrix[`realrow',81] = sqrt((`D_w'*(1-`D_w'))/`N_w')
		matrix realmatrix[`realrow',64]=`D_b'
		matrix realmatrix[`realrow',82] = sqrt((`D_b'*(1-`D_b'))/`N_b')
				
		foreach j of numlist 2(1)6 {
		sum inv_wi_`j'm if fc != 1
		matrix realmatrix[`realrow',63+`j']=r(mean)
				
		sum inv_wi_`j'm if black == 0 & fc != 1
		matrix realmatrix[`realrow',68+`j']=r(mean)
			
		sum inv_wi_`j'm if black == 1 & fc != 1
		matrix realmatrix[`realrow',73+`j']=r(mean)
		
		}
		
		sum inv_wi_6m if black == 0
		local Y_w = r(mean)
		sum inv_wi_6m if black == 1
		local Y_b = r(mean)

		local white_lower = `Y_w' 
		local white_upper = 1-((1-`D_w')*(1-`Y_w')) 
		local black_lower = `Y_b'
		local black_upper = 1-((1-`D_b')*(1-`Y_b'))
		
		di "`black_lower'"
		di "`black_upper'"
		di "`white_lower'"
		di "`white_upper'"

		matrix tempmatrix = J(11000,13,.)
		local row = 1
		forval ate_b=`black_lower'(0.004)`black_upper' { 
			forval ate_w=`white_lower'(0.004)`white_upper' { 
			
			local ate = `ate_b'*`bshare'+`ate_w'*(1-`bshare')

			local j0 = ((1-`white_lower')*(1-`D_w')/(1-`ate_w'))-((1-`black_lower')*(1-`D_b')/(1-`ate_b'))
			local j1 = (`white_lower'*(1-`D_w')/`ate_w') - (`black_lower'*(1-`D_b')/`ate_b')

			local hyp_adj_gap = (`j0'*(1-`ate'))+(`j1'*`ate')

			matrix tempmatrix[`row',1]=`ate_b'
			matrix tempmatrix[`row',2]=`ate_w'
			matrix tempmatrix[`row',3]=`hyp_adj_gap'
			matrix tempmatrix[`row',4]=`j0' // conditional UD, unweighted
			matrix tempmatrix[`row',5]=`j1'	
			matrix tempmatrix[`row',6]=`j0'*(1-`ate') // conditional UD, weighted 
			matrix tempmatrix[`row',7]=`j1'*`ate'	
			matrix tempmatrix[`row',8]=1-((1-`black_lower')*(1-`D_b')/(1-`ate_b'))
			matrix tempmatrix[`row',9]=1-((1-`white_lower')*(1-`D_w')/(1-`ate_w'))
			matrix tempmatrix[`row',10]=1-(`black_lower'*(1-`D_b')/`ate_b')
			matrix tempmatrix[`row',11]=1-(`white_lower'*(1-`D_w')/`ate_w')
			local ++row

			}
		}
		clear
		svmat tempmatrix
	
		drop if tempmatrix1 == . 

		sum tempmatrix3 // Overall UD estimates
		matrix realmatrix[`realrow',3]=r(mean)
		
		sum tempmatrix4 // Y=0 unweighted
		matrix realmatrix[`realrow',7]=r(mean)
		
		sum tempmatrix5 // Y=1 unweighted
		matrix realmatrix[`realrow',11]=r(mean)
				
		sum tempmatrix8  // Black Placement Rate, Y=0
		matrix realmatrix[`realrow',117]=r(mean)
		
		sum tempmatrix9  // White Placement Rate, Y=0
		matrix realmatrix[`realrow',121]=r(mean)
		
		sum tempmatrix10  // Black Placement Rate, Y=1
		matrix realmatrix[`realrow',125]=r(mean)
		
		sum tempmatrix11  // White Placement Rate, Y=1
		matrix realmatrix[`realrow',129]=r(mean)
		
		replace tempmatrix12 = `bshare'*tempmatrix8 + (1-`bshare')*tempmatrix9
		sum tempmatrix12  // Overall Placement Rate, Y=0
		matrix realmatrix[`realrow',165]=r(mean)
		
		replace tempmatrix13 = `bshare'*tempmatrix10 + (1-`bshare')*tempmatrix11		
		sum tempmatrix13  // Overall Placement Rate, Y=1
		matrix realmatrix[`realrow',169]=r(mean)

**************************
** Y* contour graphs
**************************

if "`state'" == "MI"{
* Overall
		set scheme plotplain

		di "`black_lower'"
		di "`black_upper'"
		di "`white_lower'"
		di "`white_upper'"

		twoway contour tempmatrix3 tempmatrix1 tempmatrix2, ccuts(.009 .010 .011 .012) zlab(#3, format(%9.3f))  xlab(.22(.005).235) ylab(.2(.005).22) ///
		ztitle("System-Wide Unwarranted Disparity") xtitle("White Mean Risk") ytitle("Black Mean Risk") ///
		scolor(gold) ecolor(dkorange) //
				
		graph export "${outpath}/MI.pdf", replace
* Y*=0	
		twoway contour tempmatrix4 tempmatrix1 tempmatrix2, zlab(#4, format(%9.2f))  xlab(.22(.005).235) ylab(.2(.005).22) ///
		ztitle("Unwarranted Disparity, Y*=0") xtitle("White Mean Risk") ytitle("Black Mean Risk")  ///
		ccolors("208 239 255" "250 245 145" "255 195 52") //
		
		graph export "${outpath}/MI_y0.pdf", replace

* Y*=1
		twoway contour tempmatrix5 tempmatrix1 tempmatrix2, zlab(#3, format(%9.2f))  xlab(.22(.005).235) ylab(.2(.005).22) ///
		ztitle("Unwarranted Disparity, Y*=1") xtitle("White Mean Risk") ytitle("Black Mean Risk") ///
		ccolors("208 239 255" "250 245 145" "255 195 52") //
		
		graph export "${outpath}/MI_y1.pdf", replace
}

	local ++realrow
	restore

	clear
	svmat realmatrix

	rename realmatrix1 year
	rename realmatrix3 re_inv_mean
	rename realmatrix7 re_inv_y0_uw_mean
	rename realmatrix11 re_inv_y1_uw_mean
	rename realmatrix62 fc_all
	rename realmatrix63 fc_white
	rename realmatrix64 fc_black
	rename realmatrix65 inv_wi_2m_all
	rename realmatrix66 inv_wi_3m_all
	rename realmatrix67 inv_wi_4m_all
	rename realmatrix68 inv_wi_5m_all
	rename realmatrix69 inv_wi_6m_all
	rename realmatrix70 inv_wi_2m_white
	rename realmatrix71 inv_wi_3m_white
	rename realmatrix72 inv_wi_4m_white
	rename realmatrix73 inv_wi_5m_white
	rename realmatrix74 inv_wi_6m_white
	rename realmatrix75 inv_wi_2m_black
	rename realmatrix76 inv_wi_3m_black
	rename realmatrix77 inv_wi_4m_black
	rename realmatrix78 inv_wi_5m_black
	rename realmatrix79 inv_wi_6m_black
	rename realmatrix80 fc_all_se
	rename realmatrix81 fc_white_se
	rename realmatrix82 fc_black_se
	
	rename realmatrix117 fc_y0_reinv_black_mean
	rename realmatrix121 fc_y0_reinv_white_mean
	rename realmatrix125 fc_y1_reinv_black_mean
	rename realmatrix129 fc_y1_reinv_white_mean
	
	rename realmatrix165 fc_y0_reinv_all_mean
	rename realmatrix169 fc_y1_reinv_all_mean
	
	gen state = "`state'"
	tempfile allyear_`state'
	save `allyear_`state''
}
	drop if _n > 0 // dropping everything for the append
		foreach state of local states { 
		append using `allyear_`state''
		}
	order state year
	
* Label variables
	
	label var re_inv_mean "UD Mean Estimate"
	label var re_inv_y0_uw_mean "Y*=0 UD Mean Estimate, UW"
	label var re_inv_y1_uw_mean "Y*=1 UD Mean Estimate, UW"
	
	label var fc_y0_reinv_black_mean "Reinvestigation Y*=0 FC Mean Estimate, Black"
	label var fc_y0_reinv_white_mean "Reinvestigation Y*=0 FC Mean Estimate, White"
	label var fc_y1_reinv_black_mean "Reinvestigation Y*=1 FC Mean Estimate, Black"
	label var fc_y1_reinv_white_mean "Reinvestigation Y*=1 FC Mean Estimate, White"
	
	label var fc_y0_reinv_all_mean "Reinvestigation Y*=0 Mean Estimate, All"
	label var fc_y1_reinv_all_mean "Reinvestigation Y*=1 Mean Estimate, All"
	
	label var fc_all "Foster Care Removal Rate - All"
	label var fc_white "Foster Care Removal Rate - White"
	label var fc_black "Foster Care Removal Rate - Black"
	
	drop realmatrix*
	
	foreach var of varlist _all {
	capture assert mi(`var')
	if !_rc {
	drop `var'
	}
	}
	
	save "${cleanpath}/state_year_long_avgs.dta", replace

}

